Detecting Multimodal AUC distributions in HNSCC

We currently assign sensitivities by tail-assignment, however, there may be cases with distinctive multimodal distributions that are indicative of different response models.

We'll begin by testing the method on beatAML data where we have much greater number of observations, and therefore more well defined distributions.

$$ p(x) = \frac{1}{\sqrt{ 2 \pi \sigma^2 }} e^{ - \frac{ (x - \mu)^2 } {2 \sigma^2} } $$
In [285]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}
In [286]:
import pandas as pd 
import numpy as np
from sklearn.mixture import GMM
import seaborn as sbn
from matplotlib import pyplot as plt
import warnings

# ------ for mut models ---------
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LogisticRegression
from sklearn.manifold import TSNE
import statsmodels.api as sm
from sklearn import metrics

warnings.filterwarnings("ignore")
In [287]:
aml_aucs = pd.read_csv('./../data/beatAML_aucs.csv')[['inhibitor','lab_id','auc']].drop_duplicates()

#aml_aucs = pd.read_csv('./../data/beatAML_AUCs_subset.csv')
In [288]:
aml_aucs.head()
Out[288]:
inhibitor lab_id auc
0 17-AAG (Tanespimycin) 12-00211 225.918025
7 17-AAG (Tanespimycin) 12-00219 135.264409
14 17-AAG (Tanespimycin) 12-00258 164.561227
21 17-AAG (Tanespimycin) 12-00262 111.555971
28 17-AAG (Tanespimycin) 12-00268 226.805281
In [289]:
def add_normal_plot(mu, s, weight, c, ax, auc_max=300): 
    '''
    mu = mean
    s = standard deviation
    ax = matplotlib axes to add to 
    c = color {[r b g c y ...]} <str> 
    '''
    
    x = np.arange(0, auc_max, 1)
    y = (1/(2*np.pi*s**2)**(0.5))*np.exp( - (x-mu)**2/(2*s**2) ) * weight
    
    ax = ax.plot(x,y, color=c, label='mean: %.1f, std: %.1f' %(mu, s))

def get_color():
    '''
    
    '''
    for c in ['r','b','g','c','y']: 
        yield c
    
In [290]:
gen = get_color()
gen.__next__()
Out[290]:
'r'
In [291]:
def test_multimodal_fits(X, ntests=10, kmax=5, plot=True, inhib=None, override_k=False): 
    '''
    
    '''
    
    res = {x:[] for x in ['k', 'aic', 'bic']}
    for k in range(1,kmax): 
        for i in range(ntests): 
            
            #print('k: %d' %k)
            gmm = GMM(n_components=k, n_init=1)
            gmm.fit(X)
            res['k'].append( k )
            res['aic'].append( gmm.aic(X) )
            res['bic'].append( gmm.bic(X) )
            
    res = pd.DataFrame( res )
    
    best_k = res[res.bic == np.min(res.bic)].k.unique()[0] if not override_k else override_k
    gmm_best = GMM(n_components=best_k,n_init=20) 
    gmm_best.fit(X)
    P = gmm_best.predict(X)
    
    if plot:         
        nbins = 50
        bin_ = np.arange(0,np.max(X),np.max(X)/nbins)
        
        scalar_to_make_pretty = 0.25 # since our fitted Gaussians are normalized to their weights, they appear smaller
        weights_ = scalar_to_make_pretty/len(X)
        
        f, axs = plt.subplots(1,3,figsize=(15,5)) 
        sbn.distplot(X, bins=bin_, ax=axs[0]).set_title('AUC distribution')
        sbn.scatterplot(x='k', y='bic', alpha=0.3, data=res, ax=axs[1]).set_title('BIC vs K')
        
        clas = 0
        for weight, mean, covars, c in zip(gmm_best.weights_, gmm_best.means_, gmm_best.covars_, get_color()): 
            sbn.distplot(X[P==clas], bins=bin_, kde=False, color=c, ax=axs[2], label='AUC', hist_kws={'weights': [weights_]*len(X[P==clas])})
            add_normal_plot(mean[0], (covars[0])**0.5, weight, c, axs[2], auc_max=np.max(X))
            clas+=1
        
        axs[2].set_title('Optimal GMM fit')
        
        plt.legend()
        plt.suptitle(inhib)
        
        print('Number of assays (aucs): %d' %len(X))
        print('Optimal K: %d [BIC=%.1f]' %(best_k, np.min(res.bic)))
        print('GMM fit:\n\tMixture Weights: %r\n\tMeans: %r\n\tVariances: %r' %(gmm_best.weights_.ravel(), gmm_best.means_.ravel(), gmm_best.covars_.ravel()))
        print('Class counts: %r' %['class %d: %d' %(cl, len(X[P==cl])) for cl in list(set(P))])
        
        plt.show()
        
    return P
In [292]:
for inhib in aml_aucs.inhibitor.unique(): 
    inhib_dat = aml_aucs[aml_aucs.inhibitor == inhib]
    if inhib_dat.shape[0] > 450: 
        print('------------------------------------------------')
        print('Inhibitor:  %s' %inhib)
        print('------------------------------------------------')
        AUCS =  inhib_dat.auc.values.reshape(-1,1)
        test_multimodal_fits(AUCS, ntests=10, kmax=6, inhib=inhib, plot=True)
    
------------------------------------------------
Inhibitor:  17-AAG (Tanespimycin)
------------------------------------------------
Number of assays (aucs): 422
Optimal K: 1 [BIC=4443.6]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 154.50949899])
	Variances: array([ 2128.91559614])
Class counts: ['class 0: 422']
------------------------------------------------
Inhibitor:  A-674563
------------------------------------------------
Number of assays (aucs): 438
Optimal K: 1 [BIC=4651.1]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 151.07041673])
	Variances: array([ 2329.23070046])
Class counts: ['class 0: 438']
------------------------------------------------
Inhibitor:  Afatinib (BIBW-2992)
------------------------------------------------
Number of assays (aucs): 434
Optimal K: 1 [BIC=4401.1]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 175.66455291])
	Variances: array([ 1443.52059549])
Class counts: ['class 0: 434']
------------------------------------------------
Inhibitor:  Alisertib (MLN8237)
------------------------------------------------
Number of assays (aucs): 436
Optimal K: 2 [BIC=4329.8]
GMM fit:
	Mixture Weights: array([ 0.65671761,  0.34328239])
	Means: array([ 246.26264189,  204.20260238])
	Variances: array([  475.64319411,  1821.18446843])
Class counts: ['class 0: 327', 'class 1: 109']
------------------------------------------------
Inhibitor:  AT7519
------------------------------------------------
Number of assays (aucs): 422
Optimal K: 2 [BIC=4229.0]
GMM fit:
	Mixture Weights: array([ 0.75314284,  0.24685716])
	Means: array([ 124.75150735,  178.46550957])
	Variances: array([  563.09991858,  2338.81468744])
Class counts: ['class 0: 358', 'class 1: 64']
------------------------------------------------
Inhibitor:  Axitinib (AG-013736)
------------------------------------------------
Number of assays (aucs): 489
Optimal K: 1 [BIC=5142.2]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 196.08068388])
	Variances: array([ 2105.86622831])
Class counts: ['class 0: 489']
------------------------------------------------
Inhibitor:  AZD1480
------------------------------------------------
Number of assays (aucs): 430
Optimal K: 2 [BIC=4387.8]
GMM fit:
	Mixture Weights: array([ 0.31624174,  0.68375826])
	Means: array([ 191.00020832,  237.56460238])
	Variances: array([ 2623.05171184,   679.35869862])
Class counts: ['class 0: 83', 'class 1: 347']
------------------------------------------------
Inhibitor:  Barasertib (AZD1152-HQPA)
------------------------------------------------
Number of assays (aucs): 434
Optimal K: 3 [BIC=4460.0]
GMM fit:
	Mixture Weights: array([ 0.11282023,  0.42330403,  0.46387574])
	Means: array([ 132.78487016,  207.89923596,  247.66057351])
	Variances: array([ 2142.92992008,   785.09445571,   367.0611694 ])
Class counts: ['class 0: 38', 'class 1: 166', 'class 2: 230']
------------------------------------------------
Inhibitor:  BEZ235
------------------------------------------------
Number of assays (aucs): 423
Optimal K: 1 [BIC=4549.3]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 155.1396563])
	Variances: array([ 2665.87541679])
Class counts: ['class 0: 423']
------------------------------------------------
Inhibitor:  BMS-345541
------------------------------------------------
Number of assays (aucs): 436
Optimal K: 2 [BIC=4153.7]
GMM fit:
	Mixture Weights: array([ 0.6477877,  0.3522123])
	Means: array([ 239.06803525,  209.11838798])
	Variances: array([  323.31946996,  1320.81784771])
Class counts: ['class 0: 345', 'class 1: 91']
------------------------------------------------
Inhibitor:  Bortezomib (Velcade)
------------------------------------------------
Number of assays (aucs): 448
Optimal K: 1 [BIC=4913.1]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 158.67686228])
	Variances: array([ 3299.70585455])
Class counts: ['class 0: 448']
------------------------------------------------
Inhibitor:  Bosutinib (SKI-606)
------------------------------------------------
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-292-bbd6af70def9> in <module>()
      6         print('------------------------------------------------')
      7         AUCS =  inhib_dat.auc.values.reshape(-1,1)
----> 8         test_multimodal_fits(AUCS, ntests=10, kmax=6, inhib=inhib, plot=True)
      9 

<ipython-input-291-9324daaa2a62> in test_multimodal_fits(X, ntests, kmax, plot, inhib, override_k)
     10             #print('k: %d' %k)
     11             gmm = GMM(n_components=k, n_init=1)
---> 12             gmm.fit(X)
     13             res['k'].append( k )
     14             res['aic'].append( gmm.aic(X) )

C:\Anaconda3\lib\site-packages\sklearn\mixture\gmm.py in fit(self, X, y)
    595         self
    596         """
--> 597         self._fit(X, y)
    598         return self
    599 

C:\Anaconda3\lib\site-packages\sklearn\mixture\gmm.py in _fit(self, X, y, do_prediction)
    491                 self.means_ = cluster.KMeans(
    492                     n_clusters=self.n_components,
--> 493                     random_state=self.random_state).fit(X).cluster_centers_
    494                 if self.verbose > 1:
    495                     print('\tMeans have been initialized.')

C:\Anaconda3\lib\site-packages\sklearn\cluster\k_means_.py in fit(self, X, y)
    887                 tol=self.tol, random_state=random_state, copy_x=self.copy_x,
    888                 n_jobs=self.n_jobs, algorithm=self.algorithm,
--> 889                 return_n_iter=True)
    890         return self
    891 

C:\Anaconda3\lib\site-packages\sklearn\cluster\k_means_.py in k_means(X, n_clusters, init, precompute_distances, n_init, max_iter, verbose, tol, random_state, copy_x, n_jobs, algorithm, return_n_iter)
    343                 X, n_clusters, max_iter=max_iter, init=init, verbose=verbose,
    344                 precompute_distances=precompute_distances, tol=tol,
--> 345                 x_squared_norms=x_squared_norms, random_state=random_state)
    346             # determine if these results are the best so far
    347             if best_inertia is None or inertia < best_inertia:

C:\Anaconda3\lib\site-packages\sklearn\cluster\k_means_.py in _kmeans_single_elkan(X, n_clusters, max_iter, init, verbose, x_squared_norms, random_state, tol, precompute_distances)
    392     # init
    393     centers = _init_centroids(X, n_clusters, init, random_state=random_state,
--> 394                               x_squared_norms=x_squared_norms)
    395     centers = np.ascontiguousarray(centers)
    396     if verbose:

C:\Anaconda3\lib\site-packages\sklearn\cluster\k_means_.py in _init_centroids(X, k, init, random_state, x_squared_norms, init_size)
    679     if isinstance(init, string_types) and init == 'k-means++':
    680         centers = _k_init(X, k, random_state=random_state,
--> 681                           x_squared_norms=x_squared_norms)
    682     elif isinstance(init, string_types) and init == 'random':
    683         seeds = random_state.permutation(n_samples)[:k]

C:\Anaconda3\lib\site-packages\sklearn\cluster\k_means_.py in _k_init(X, n_clusters, x_squared_norms, random_state, n_local_trials)
    112         # Compute distances to center candidates
    113         distance_to_candidates = euclidean_distances(
--> 114             X[candidate_ids], X, Y_norm_squared=x_squared_norms, squared=True)
    115 
    116         # Decide which candidate is the best

C:\Anaconda3\lib\site-packages\sklearn\metrics\pairwise.py in euclidean_distances(X, Y, Y_norm_squared, squared, X_norm_squared)
    244 
    245     distances = safe_sparse_dot(X, Y.T, dense_output=True)
--> 246     distances *= -2
    247     distances += XX
    248     distances += YY

KeyboardInterrupt: 

Save subset of okay'd data

In [293]:
okay_data = ['YM-155', 'Vandetanib (ZD6474)', 'Trametinib (GSK1120212)', 'Sunitinib','Sorafenib', 'Selumetinib (AZD6244)', 'Nilotinib', 'JAK Inhibitor I', 'Pazopanib (GW786034)', 'Elesclomol', 'Dasatinib']

dat = aml_aucs[aml_aucs.inhibitor.isin(okay_data)]
dat.to_csv('./../data/beatAML_AUCs_subset.csv')

Test HNSCC distribution

The next step is to ask, can we model these drug responses across cancer types? As in, are the distributions that are well defined by the beatAML similarly define the HNSCC distribution?

We will try to test this by setting our null hypothesis as:

Let X be the underlying set distribution such that: $$ AUC_{beatAML} \in X $$

$$ H_0: AUC_{HNSCC} \in X $$

and $$ H_1: AUC_{HNSCC} \notin X $$

We will use permutation testing, and reject cases where alpha=0.05.

In [294]:
HNSCC_all = pd.read_csv('./../data/HNSCC_all_functional_data.csv')
HNSCC_auc = HNSCC_all[['lab_id','inhibitor','auc','call']].drop_duplicates()
HNSCC_auc.head()
Out[294]:
lab_id inhibitor auc call
0 10004 JQ1;BEZ235 1.044426 int
1 10004 Lucitanib 2.809226 res
2 10004 Resveratrol 2.794546 int
3 10004 Pelitinib (EKB-569) 1.221167 sens
4 10004 Bicalutamide 2.742498 int
In [297]:
shared_drugs = list( set(HNSCC_all.inhibitor) & set(aml_aucs.inhibitor) )

shared_drugs
Out[297]:
['Rapamycin',
 'BI-2536',
 'PP242',
 'Entrectinib',
 'Panobinostat',
 'Tozasertib (VX-680)',
 'BEZ235',
 'A-674563',
 '17-AAG (Tanespimycin)',
 'Elesclomol',
 'JNJ-28312141',
 'Nilotinib',
 'STO609',
 'Lenvatinib',
 'Pelitinib (EKB-569)',
 'Selumetinib (AZD6244)',
 'GDC-0941',
 'Neratinib (HKI-272)',
 'NVP-ADW742',
 'SCH-772984',
 'MK-2206',
 'Imatinib',
 'JQ1',
 'Lapatinib',
 'Lenalidomide',
 'Sorafenib',
 'YM-155',
 'MGCD-265',
 'Canertinib (CI-1033)',
 'TAK-659',
 'Taselisib (GDC-0032)',
 'Bortezomib (Velcade)',
 'Erlotinib',
 'PI-103',
 'Selinexor',
 'Gefitinib',
 'INK-128',
 'Lestaurtinib (CEP-701)',
 'MLN120B',
 'Flavopiridol',
 'GSK-1838705A']
In [ ]:
def permutation_test(x, y, n=1e5, verbose=True, return_prob=True, alpha=0.05):
    '''
    two tailed permutation test to see if y is apart of same model as x 
    
    returns the probability that y is not drawn from the same underlying model (or True if prob > 0.05 and return_prob=True)
    
    if pval > alpha -> x,y may not have been drawn from same distribution (alpha probability not drawn)
    '''
    
    delta_mean = np.abs( np.mean(x) - np.mean(y) )
    permutation_means = []

    for i in range(int(n)):
        if (i % 333 == 0) and verbose: print('Running permutations...[%.2f%%]' %(i/n*100), end='\r')
        perms = np.random.permutation(np.append(x,y))
        perm_y = perms[:len(y)]
        perm_x = perms[len(y):]
        permutation_means.append(np.abs(np.mean(perm_y) - np.mean(perm_x)))
    print()

    pval = np.sum( permutation_means >= delta_mean ) / len( permutation_means )
    return pval if return_prob else pval >= alpha

def test_distribution_similarity(aml, hnscc, inhib=None, verbose=True, perms=1e5): 
    '''
    
    '''
    if verbose: print('---------------------------------------------------------')
    if verbose: print('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX')
    if verbose: print(inhib)
    if verbose: print('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX')
    if verbose: print('---------------------------------------------------------')
    if verbose: print()
        
    print('HNSCC assays:    %d' %hnscc.shape[0])
    print('AML assays:      %d' %aml.shape[0])
        
    test_multimodal_fits(aml, ntests=10, kmax=6, inhib=inhib, plot=True)
    test_multimodal_fits(hnscc, ntests=10, kmax=6, inhib=inhib, plot=True)
    test_multimodal_fits(np.append(aml.ravel(),hnscc.ravel()).reshape(-1,1), ntests=10, kmax=6, inhib=inhib + ' hnscc-aml', plot=True)
    
    nbins=50
    bin_ = np.arange(0,300,300/nbins)

    plt.figure()
    plt.hist(aml_,color='blue',label='AML_AUC',normed=True,alpha=0.4,bins=bin_)
    plt.hist(hnscc_,color='red',label='HNSCC_AUC',normed=True,alpha=0.4,bins=bin_)
    plt.legend()
    plt.show()
    
    pval = permutation_test(aml_, hnscc_, n=perms, verbose=True, return_prob=True)
    if verbose: print('Dasatinib permutation test p-value: %f' %pval)
    
In [305]:
for inhib in shared_drugs: 
    aml_ = aml_aucs[aml_aucs.inhibitor == inhib].drop_duplicates().auc.values.reshape(-1,1)
    hnscc_ = 100*HNSCC_all[HNSCC_all.inhibitor == inhib].auc.drop_duplicates().values.reshape(-1,1) #hnscc is weighted by 1/100
    
    if aml_.shape[0] + hnscc_.shape[0] > 200: 
        test_distribution_similarity(aml_, hnscc_, inhib=inhib, perms=1e6)
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Rapamycin
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      478
Number of assays (aucs): 478
Optimal K: 1 [BIC=5206.2]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 175.98806926])
	Variances: array([ 3065.14456621])
Class counts: ['class 0: 478']
Number of assays (aucs): 17
Optimal K: 1 [BIC=188.8]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 212.46846562])
	Variances: array([ 2787.11227825])
Class counts: ['class 0: 17']
Number of assays (aucs): 495
Optimal K: 1 [BIC=5396.5]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 177.24093136])
	Variances: array([ 3099.73122504])
Class counts: ['class 0: 495']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.007603
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
PP242
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      432
Number of assays (aucs): 432
Optimal K: 1 [BIC=4575.4]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 139.92493152])
	Variances: array([ 2264.95055139])
Class counts: ['class 0: 432']
Number of assays (aucs): 17
Optimal K: 1 [BIC=175.0]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 199.8533003])
	Variances: array([ 1241.56981283])
Class counts: ['class 0: 17']
Number of assays (aucs): 449
Optimal K: 1 [BIC=4773.0]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 142.19393434])
	Variances: array([ 2357.03266083])
Class counts: ['class 0: 449']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000002
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Panobinostat
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      208
Number of assays (aucs): 208
Optimal K: 2 [BIC=2279.8]
GMM fit:
	Mixture Weights: array([ 0.73669121,  0.26330879])
	Means: array([  60.96557798,  182.07599155])
	Variances: array([  932.05620099,  4012.50516911])
Class counts: ['class 0: 160', 'class 1: 48']
Number of assays (aucs): 17
Optimal K: 1 [BIC=187.1]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 123.02367096])
	Variances: array([ 2528.13453794])
Class counts: ['class 0: 17']
Number of assays (aucs): 225
Optimal K: 2 [BIC=2476.5]
GMM fit:
	Mixture Weights: array([ 0.32148666,  0.67851334])
	Means: array([ 167.97707387,   60.62082131])
	Variances: array([ 4224.29257575,   919.90057886])
Class counts: ['class 0: 62', 'class 1: 163']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.073498
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Tozasertib (VX-680)
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      436
Number of assays (aucs): 436
Optimal K: 2 [BIC=4440.2]
GMM fit:
	Mixture Weights: array([ 0.66291631,  0.33708369])
	Means: array([ 227.08966462,  184.24468239])
	Variances: array([  718.34101867,  2144.77726346])
Class counts: ['class 0: 352', 'class 1: 84']
Number of assays (aucs): 17
Optimal K: 1 [BIC=167.2]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 236.30828056])
	Variances: array([ 783.72554039])
Class counts: ['class 0: 17']
Number of assays (aucs): 453
Optimal K: 2 [BIC=4609.7]
GMM fit:
	Mixture Weights: array([ 0.66176668,  0.33823332])
	Means: array([ 227.95382197,  185.32477745])
	Variances: array([  717.04548478,  2120.82972573])
Class counts: ['class 0: 364', 'class 1: 89']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.017023
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
BEZ235
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      423
Number of assays (aucs): 423
Optimal K: 1 [BIC=4549.3]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 155.1396563])
	Variances: array([ 2665.87541679])
Class counts: ['class 0: 423']
Number of assays (aucs): 17
Optimal K: 1 [BIC=179.9]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 126.14004924])
	Variances: array([ 1652.66669288])
Class counts: ['class 0: 17']
Number of assays (aucs): 440
Optimal K: 1 [BIC=4730.4]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 154.01921693])
	Variances: array([ 2657.965633])
Class counts: ['class 0: 440']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.022372
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
A-674563
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      438
Number of assays (aucs): 438
Optimal K: 1 [BIC=4651.1]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 151.07041673])
	Variances: array([ 2329.23070046])
Class counts: ['class 0: 438']
Number of assays (aucs): 17
Optimal K: 3 [BIC=175.6]
GMM fit:
	Mixture Weights: array([ 0.29981426,  0.05882353,  0.64136221])
	Means: array([ 166.80590253,   90.79631394,  223.38192797])
	Variances: array([  1.33839394e+02,   1.00000002e-03,   2.87875215e+02])
Class counts: ['class 0: 5', 'class 1: 1', 'class 2: 11']
Number of assays (aucs): 455
Optimal K: 1 [BIC=4841.7]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 152.84701231])
	Variances: array([ 2383.50534535])
Class counts: ['class 0: 455']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000069
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17-AAG (Tanespimycin)
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      422
Number of assays (aucs): 422
Optimal K: 1 [BIC=4443.6]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 154.50949899])
	Variances: array([ 2128.91559614])
Class counts: ['class 0: 422']
Number of assays (aucs): 17
Optimal K: 1 [BIC=187.5]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 142.10263706])
	Variances: array([ 2593.17199568])
Class counts: ['class 0: 17']
Number of assays (aucs): 439
Optimal K: 1 [BIC=4627.1]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 154.02905103])
	Variances: array([ 2152.62365561])
Class counts: ['class 0: 439']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.281617
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Elesclomol
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      401
Number of assays (aucs): 401
Optimal K: 3 [BIC=4037.7]
GMM fit:
	Mixture Weights: array([ 0.39585498,  0.09239963,  0.51174539])
	Means: array([  66.66850544,  203.44968902,   26.350298  ])
	Variances: array([  612.43965939,  2510.66009017,   166.22601469])
Class counts: ['class 0: 144', 'class 1: 36', 'class 2: 221']
Number of assays (aucs): 17
Optimal K: 1 [BIC=193.6]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 169.47474365])
	Variances: array([ 3705.46371316])
Class counts: ['class 0: 17']
Number of assays (aucs): 418
Optimal K: 3 [BIC=4273.2]
GMM fit:
	Mixture Weights: array([ 0.49958492,  0.11522109,  0.38519398])
	Means: array([  26.80930943,  204.40916182,   68.10812667])
	Variances: array([  174.72590117,  2172.78306839,   663.39530539])
Class counts: ['class 0: 223', 'class 1: 48', 'class 2: 147']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000000
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
JNJ-28312141
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      419
Number of assays (aucs): 419
Optimal K: 1 [BIC=4639.4]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 143.79579035])
	Variances: array([ 3661.97483019])
Class counts: ['class 0: 419']
Number of assays (aucs): 17
Optimal K: 1 [BIC=149.3]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 251.8606813])
	Variances: array([ 272.87665377])
Class counts: ['class 0: 17']
Number of assays (aucs): 436
Optimal K: 1 [BIC=4862.1]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 148.00932968])
	Variances: array([ 3967.41284807])
Class counts: ['class 0: 436']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000000
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Nilotinib
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    16
AML assays:      495
Number of assays (aucs): 495
Optimal K: 2 [BIC=5019.4]
GMM fit:
	Mixture Weights: array([ 0.49843481,  0.50156519])
	Means: array([ 255.37294147,  199.69481155])
	Variances: array([  463.14206147,  1000.14457906])
Class counts: ['class 0: 258', 'class 1: 237']
Number of assays (aucs): 16
Optimal K: 3 [BIC=136.6]
GMM fit:
	Mixture Weights: array([ 0.52113369,  0.41636631,  0.0625    ])
	Means: array([ 279.71264701,  258.40193221,  202.10509219])
	Variances: array([  1.44996077e+01,   1.06721442e+02,   1.00000009e-03])
Class counts: ['class 0: 9', 'class 1: 6', 'class 2: 1']
Number of assays (aucs): 511
Optimal K: 2 [BIC=5177.0]
GMM fit:
	Mixture Weights: array([ 0.50427975,  0.49572025])
	Means: array([ 200.50355557,  257.28957676])
	Variances: array([ 995.182392  ,  425.29868289])
Class counts: ['class 0: 242', 'class 1: 269']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000108
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
STO609
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      436
Number of assays (aucs): 436
Optimal K: 4 [BIC=3949.4]
GMM fit:
	Mixture Weights: array([ 0.42245225,  0.28890567,  0.23947687,  0.04916521])
	Means: array([ 263.3165585 ,  235.74890513,  281.66201871,  192.92558307])
	Variances: array([   82.24762101,   374.673727  ,    14.75988029,  1410.20555971])
Class counts: ['class 0: 193', 'class 1: 115', 'class 2: 115', 'class 3: 13']
Number of assays (aucs): 17
Optimal K: 3 [BIC=143.5]
GMM fit:
	Mixture Weights: array([ 0.82352942,  0.11764706,  0.05882352])
	Means: array([ 274.72440711,  149.54348404,  240.9085346 ])
	Variances: array([  4.40641204e+01,   4.94380372e+01,   1.00000012e-03])
Class counts: ['class 0: 14', 'class 1: 2', 'class 2: 1']
Number of assays (aucs): 453
Optimal K: 4 [BIC=4104.0]
GMM fit:
	Mixture Weights: array([ 0.40382749,  0.28167732,  0.06104071,  0.25345448])
	Means: array([ 263.7345739 ,  237.29861719,  191.5813359 ,  281.35284522])
	Variances: array([   75.33062864,   332.80109707,  1300.5900311 ,    16.24119344])
Class counts: ['class 0: 193', 'class 1: 114', 'class 2: 20', 'class 3: 126']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.799715
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Lenvatinib
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      279
Number of assays (aucs): 279
Optimal K: 1 [BIC=2946.0]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 172.08187128])
	Variances: array([ 2166.85878667])
Class counts: ['class 0: 279']
Number of assays (aucs): 17
Optimal K: 3 [BIC=157.6]
GMM fit:
	Mixture Weights: array([ 0.05882353,  0.61223519,  0.32894128])
	Means: array([  11.18374953,  269.70397771,  239.76000524])
	Variances: array([  1.00000000e-03,   5.07855991e+01,   4.51429048e+02])
Class counts: ['class 0: 1', 'class 1: 12', 'class 2: 4']
Number of assays (aucs): 296
Optimal K: 1 [BIC=3172.5]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 176.2494687])
	Variances: array([ 2544.44532082])
Class counts: ['class 0: 296']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000000
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Pelitinib (EKB-569)
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      431
Number of assays (aucs): 431
Optimal K: 1 [BIC=4461.9]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 163.13544434])
	Variances: array([ 1783.6839003])
Class counts: ['class 0: 431']
Number of assays (aucs): 17
Optimal K: 3 [BIC=190.9]
GMM fit:
	Mixture Weights: array([ 0.41260953,  0.41183301,  0.17555746])
	Means: array([ 132.79205761,  254.52541837,  184.53144187])
	Variances: array([ 287.93931569,  330.0789751 ,    4.68212506])
Class counts: ['class 0: 7', 'class 1: 7', 'class 2: 3']
Number of assays (aucs): 448
Optimal K: 1 [BIC=4659.3]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 164.2310968])
	Variances: array([ 1872.48593435])
Class counts: ['class 0: 448']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.007141
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Selumetinib (AZD6244)
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    16
AML assays:      433
Number of assays (aucs): 433
Optimal K: 5 [BIC=4831.9]
GMM fit:
	Mixture Weights: array([ 0.12914925,  0.19407319,  0.29483783,  0.15333835,  0.22860138])
	Means: array([ 281.77366842,  151.06440004,  239.00738032,   95.63644597,
        175.89948627])
	Variances: array([   27.70942088,  1987.53537673,   557.58173208,  1286.40737068,
        1293.20958498])
Class counts: ['class 0: 61', 'class 1: 47', 'class 2: 141', 'class 3: 71', 'class 4: 113']
Number of assays (aucs): 16
Optimal K: 4 [BIC=161.9]
GMM fit:
	Mixture Weights: array([ 0.0625    ,  0.49999807,  0.18750216,  0.24999976])
	Means: array([ 151.75152427,  277.73319668,  231.16232383,  188.68807924])
	Variances: array([  1.00000005e-03,   5.60297376e+01,   4.42146550e+01,
         2.29101756e+01])
Class counts: ['class 0: 1', 'class 1: 8', 'class 2: 3', 'class 3: 4']
Number of assays (aucs): 449
Optimal K: 3 [BIC=4975.7]
GMM fit:
	Mixture Weights: array([ 0.48555661,  0.36851018,  0.14593321])
	Means: array([ 219.50574191,  122.69079105,  280.683043  ])
	Variances: array([ 1190.76074528,  1798.41165434,    37.33787695])
Class counts: ['class 0: 212', 'class 1: 163', 'class 2: 74']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.004938
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
GDC-0941
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      403
Number of assays (aucs): 403
Optimal K: 1 [BIC=4211.5]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 155.88303782])
	Variances: array([ 1964.208886])
Class counts: ['class 0: 403']
Number of assays (aucs): 17
Optimal K: 1 [BIC=178.4]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 132.14193862])
	Variances: array([ 1515.04185105])
Class counts: ['class 0: 17']
Number of assays (aucs): 420
Optimal K: 1 [BIC=4389.6]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 154.92208856])
	Variances: array([ 1967.91888361])
Class counts: ['class 0: 420']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.030145
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Neratinib (HKI-272)
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      433
Number of assays (aucs): 433
Optimal K: 1 [BIC=4559.0]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 161.97390563])
	Variances: array([ 2127.98147262])
Class counts: ['class 0: 433']
Number of assays (aucs): 17
Optimal K: 1 [BIC=181.7]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 198.79088486])
	Variances: array([ 1841.24814213])
Class counts: ['class 0: 17']
Number of assays (aucs): 450
Optimal K: 1 [BIC=4745.6]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 163.36476929])
	Variances: array([ 2166.42222135])
Class counts: ['class 0: 450']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.001357
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
NVP-ADW742
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      415
Number of assays (aucs): 415
Optimal K: 2 [BIC=4184.6]
GMM fit:
	Mixture Weights: array([ 0.61551839,  0.38448161])
	Means: array([ 231.11355308,  197.1292676 ])
	Variances: array([  648.79332419,  1978.51115865])
Class counts: ['class 0: 321', 'class 1: 94']
Number of assays (aucs): 17
Optimal K: 2 [BIC=169.9]
GMM fit:
	Mixture Weights: array([ 0.54126442,  0.45873558])
	Means: array([ 221.63512323,  278.63532448])
	Variances: array([ 432.16215046,   46.35083761])
Class counts: ['class 0: 9', 'class 1: 8']
Number of assays (aucs): 432
Optimal K: 2 [BIC=4365.6]
GMM fit:
	Mixture Weights: array([ 0.60334907,  0.39665093])
	Means: array([ 232.40294323,  199.16071867])
	Variances: array([  682.75000853,  1955.82577503])
Class counts: ['class 0: 331', 'class 1: 101']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.002262
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
SCH-772984
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    15
AML assays:      358
Number of assays (aucs): 358
Optimal K: 2 [BIC=3972.3]
GMM fit:
	Mixture Weights: array([ 0.53164461,  0.46835539])
	Means: array([ 133.21946959,  239.32744565])
	Variances: array([ 1464.75815448,  1072.03915504])
Class counts: ['class 0: 191', 'class 1: 167']
Number of assays (aucs): 15
Optimal K: 1 [BIC=162.3]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 235.87690688])
	Variances: array([ 2045.93746769])
Class counts: ['class 0: 15']
Number of assays (aucs): 373
Optimal K: 2 [BIC=4136.8]
GMM fit:
	Mixture Weights: array([ 0.47144656,  0.52855344])
	Means: array([ 241.4704015 ,  134.71698867])
	Variances: array([ 1016.21311892,  1497.85415365])
Class counts: ['class 0: 174', 'class 1: 199']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.001393
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
MK-2206
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      420
Number of assays (aucs): 420
Optimal K: 2 [BIC=4559.8]
GMM fit:
	Mixture Weights: array([ 0.50416044,  0.49583956])
	Means: array([ 245.37914994,  166.09399066])
	Variances: array([  823.23414807,  2397.58311345])
Class counts: ['class 0: 228', 'class 1: 192']
Number of assays (aucs): 17
Optimal K: 1 [BIC=168.0]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 250.91065639])
	Variances: array([ 819.96981588])
Class counts: ['class 0: 17']
Number of assays (aucs): 437
Optimal K: 2 [BIC=4737.2]
GMM fit:
	Mixture Weights: array([ 0.48341481,  0.51658519])
	Means: array([ 166.94731455,  246.05068468])
	Variances: array([ 2427.71585776,   818.13812116])
Class counts: ['class 0: 193', 'class 1: 244']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.001325
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Imatinib
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    12
AML assays:      511
Number of assays (aucs): 511
Optimal K: 3 [BIC=5165.5]
GMM fit:
	Mixture Weights: array([ 0.43545984,  0.13128803,  0.43325213])
	Means: array([ 252.6459565 ,  172.61882634,  227.41765714])
	Variances: array([  470.01793071,  3189.19280153,   881.99645221])
Class counts: ['class 0: 275', 'class 1: 40', 'class 2: 196']
Number of assays (aucs): 12
Optimal K: 5 [BIC=63.2]
GMM fit:
	Mixture Weights: array([ 0.3333333 ,  0.25      ,  0.16666667,  0.1666667 ,  0.08333333])
	Means: array([ 280.87470159,  286.21753762,  276.41450267,  282.75862477,
        284.81676973])
	Variances: array([ 0.04638306,  0.00857553,  0.31042927,  0.09145358,  0.001     ])
Class counts: ['class 0: 4', 'class 1: 3', 'class 2: 2', 'class 3: 2', 'class 4: 1']
Number of assays (aucs): 523
Optimal K: 3 [BIC=5291.1]
GMM fit:
	Mixture Weights: array([ 0.43476605,  0.11966333,  0.44557063])
	Means: array([ 256.13009846,  171.68584727,  225.4992784 ])
	Variances: array([  428.41479533,  3348.43668277,   872.19695912])
Class counts: ['class 0: 273', 'class 1: 33', 'class 2: 217']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000245
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
JQ1
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    16
AML assays:      287
Number of assays (aucs): 287
Optimal K: 1 [BIC=2998.5]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 138.15140233])
	Variances: array([ 1940.26046556])
Class counts: ['class 0: 287']
Number of assays (aucs): 16
Optimal K: 1 [BIC=157.1]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 254.57338597])
	Variances: array([ 761.13249173])
Class counts: ['class 0: 16']
Number of assays (aucs): 303
Optimal K: 1 [BIC=3248.7]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 144.29909784])
	Variances: array([ 2555.92902767])
Class counts: ['class 0: 303']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000000
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Lapatinib
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      485
Number of assays (aucs): 485
Optimal K: 2 [BIC=4735.0]
GMM fit:
	Mixture Weights: array([ 0.37159515,  0.62840485])
	Means: array([ 212.13786003,  243.08928529])
	Variances: array([ 1530.34760838,   455.4886863 ])
Class counts: ['class 0: 101', 'class 1: 384']
Number of assays (aucs): 17
Optimal K: 3 [BIC=179.8]
GMM fit:
	Mixture Weights: array([ 0.55571084,  0.38546564,  0.05882353])
	Means: array([ 189.13441101,  253.50710248,  106.25441812])
	Variances: array([  2.67662536e+02,   2.89904895e+02,   1.00000003e-03])
Class counts: ['class 0: 10', 'class 1: 6', 'class 2: 1']
Number of assays (aucs): 502
Optimal K: 2 [BIC=4920.3]
GMM fit:
	Mixture Weights: array([ 0.38851519,  0.61148481])
	Means: array([ 210.89062867,  243.49125273])
	Variances: array([ 1518.85155766,   456.61022085])
Class counts: ['class 0: 120', 'class 1: 382']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.007350
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Sorafenib
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      505
Number of assays (aucs): 505
Optimal K: 2 [BIC=5406.4]
GMM fit:
	Mixture Weights: array([ 0.42476075,  0.57523925])
	Means: array([ 160.43097496,  220.70232329])
	Variances: array([ 2577.01244736,  1141.4331894 ])
Class counts: ['class 0: 173', 'class 1: 332']
Number of assays (aucs): 17
Optimal K: 1 [BIC=151.4]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 259.10442709])
	Variances: array([ 310.30304888])
Class counts: ['class 0: 17']
Number of assays (aucs): 522
Optimal K: 2 [BIC=5596.3]
GMM fit:
	Mixture Weights: array([ 0.42367792,  0.57632208])
	Means: array([ 161.71438387,  223.26230414])
	Variances: array([ 2608.67606836,  1148.36266613])
Class counts: ['class 0: 180', 'class 1: 342']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000003
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
YM-155
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      438
Number of assays (aucs): 438
Optimal K: 4 [BIC=4293.3]
GMM fit:
	Mixture Weights: array([ 0.41133395,  0.24887023,  0.04197242,  0.29782339])
	Means: array([ 248.08561307,  279.42325154,  175.08078398,  212.2240844 ])
	Variances: array([  287.4991658 ,    36.56865149,  2604.53227968,   717.58205777])
Class counts: ['class 0: 195', 'class 1: 120', 'class 2: 6', 'class 3: 117']
Number of assays (aucs): 17
Optimal K: 4 [BIC=147.8]
GMM fit:
	Mixture Weights: array([ 0.17647059,  0.64517302,  0.11953286,  0.05882353])
	Means: array([  96.79862275,    5.64480744,   32.12945874,  110.62030442])
	Variances: array([  2.70989575e+00,   1.72868280e+01,   5.21418920e+01,
         1.00000003e-03])
Class counts: ['class 0: 3', 'class 1: 11', 'class 2: 2', 'class 3: 1']
Number of assays (aucs): 455
Optimal K: 5 [BIC=4584.5]
GMM fit:
	Mixture Weights: array([ 0.3802057 ,  0.06189971,  0.32212163,  0.02371761,  0.21205535])
	Means: array([ 251.9199857 ,  152.70193629,  215.44998899,    5.62152847,
        280.52063419])
	Variances: array([  257.23465882,  3798.34020179,   629.88878117,    17.11952828,
          26.5929325 ])
Class counts: ['class 0: 180', 'class 1: 15', 'class 2: 140', 'class 3: 11', 'class 4: 109']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000000
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
MGCD-265
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      421
Number of assays (aucs): 421
Optimal K: 2 [BIC=4437.4]
GMM fit:
	Mixture Weights: array([ 0.60255465,  0.39744535])
	Means: array([ 226.71869457,  170.23319985])
	Variances: array([  883.29490895,  2583.86128434])
Class counts: ['class 0: 292', 'class 1: 129']
Number of assays (aucs): 17
Optimal K: 1 [BIC=145.0]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 268.94054008])
	Variances: array([ 212.59646359])
Class counts: ['class 0: 17']
Number of assays (aucs): 438
Optimal K: 2 [BIC=4631.8]
GMM fit:
	Mixture Weights: array([ 0.58903911,  0.41096089])
	Means: array([ 230.08630365,  173.37186641])
	Variances: array([  912.57303272,  2630.93250245])
Class counts: ['class 0: 301', 'class 1: 137']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000001
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Canertinib (CI-1033)
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      432
Number of assays (aucs): 432
Optimal K: 2 [BIC=4345.4]
GMM fit:
	Mixture Weights: array([ 0.31247043,  0.68752957])
	Means: array([ 195.47543171,  231.11659019])
	Variances: array([ 2461.64893922,   638.25502618])
Class counts: ['class 0: 69', 'class 1: 363']
Number of assays (aucs): 17
Optimal K: 1 [BIC=185.1]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 200.46679617])
	Variances: array([ 2247.4254111])
Class counts: ['class 0: 17']
Number of assays (aucs): 449
Optimal K: 2 [BIC=4534.1]
GMM fit:
	Mixture Weights: array([ 0.66382662,  0.33617338])
	Means: array([ 231.47388764,  195.08521019])
	Variances: array([  650.49622461,  2369.28933194])
Class counts: ['class 0: 368', 'class 1: 81']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.043336
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
TAK-659
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    8
AML assays:      193
Number of assays (aucs): 193
Optimal K: 1 [BIC=2109.4]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 155.06092417])
	Variances: array([ 3094.16553919])
Class counts: ['class 0: 193']
Number of assays (aucs): 8
Optimal K: 2 [BIC=58.4]
GMM fit:
	Mixture Weights: array([ 0.74999561,  0.25000439])
	Means: array([ 283.16986407,  263.23393437])
	Variances: array([  6.4000196 ,  12.91418561])
Class counts: ['class 0: 6', 'class 1: 2']
Number of assays (aucs): 201
Optimal K: 1 [BIC=2224.3]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 159.9614165])
	Variances: array([ 3553.65788946])
Class counts: ['class 0: 201']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000000
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Taselisib (GDC-0032)
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      207
Number of assays (aucs): 207
Optimal K: 1 [BIC=2265.1]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 128.15938961])
	Variances: array([ 3142.84479615])
Class counts: ['class 0: 207']
Number of assays (aucs): 17
Optimal K: 4 [BIC=178.8]
GMM fit:
	Mixture Weights: array([ 0.36702066,  0.17646673,  0.05882353,  0.39768908])
	Means: array([ 108.77194249,  198.36159968,  239.14604521,   94.24517987])
	Variances: array([  3.80257434e+02,   9.51419130e+00,   1.00000012e-03,
         3.00966769e+02])
Class counts: ['class 0: 5', 'class 1: 3', 'class 2: 1', 'class 3: 8']
Number of assays (aucs): 224
Optimal K: 2 [BIC=2445.2]
GMM fit:
	Mixture Weights: array([ 0.5740193,  0.4259807])
	Means: array([  93.08909991,  175.11708849])
	Variances: array([ 1145.11238332,  1849.12351792])
Class counts: ['class 0: 138', 'class 1: 86']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.905428
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Bortezomib (Velcade)
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      448
Number of assays (aucs): 448
Optimal K: 1 [BIC=4913.1]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 158.67686228])
	Variances: array([ 3299.70585455])
Class counts: ['class 0: 448']
Number of assays (aucs): 17
Optimal K: 1 [BIC=184.1]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 102.60985538])
	Variances: array([ 2119.32136828])
Class counts: ['class 0: 17']
Number of assays (aucs): 465
Optimal K: 1 [BIC=5108.6]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 156.62710074])
	Variances: array([ 3367.27448531])
Class counts: ['class 0: 465']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.000081
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Erlotinib
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    16
AML assays:      494
Number of assays (aucs): 494
Optimal K: 2 [BIC=4859.8]
GMM fit:
	Mixture Weights: array([ 0.61873948,  0.38126052])
	Means: array([ 245.28723167,  206.3491596 ])
	Variances: array([  454.2151368 ,  1366.94034476])
Class counts: ['class 0: 360', 'class 1: 134']
Number of assays (aucs): 16
Optimal K: 2 [BIC=167.2]
GMM fit:
	Mixture Weights: array([ 0.56753554,  0.43246446])
	Means: array([ 250.87091923,  174.57280577])
	Variances: array([ 504.39099167,   80.49344992])
Class counts: ['class 0: 9', 'class 1: 7']
Number of assays (aucs): 510
Optimal K: 2 [BIC=5027.4]
GMM fit:
	Mixture Weights: array([ 0.61394137,  0.38605863])
	Means: array([ 245.55550283,  205.38523382])
	Variances: array([  456.70019842,  1347.79182011])
Class counts: ['class 0: 366', 'class 1: 144']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.149934
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
PI-103
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      432
Number of assays (aucs): 432
Optimal K: 1 [BIC=4592.6]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 140.35699877])
	Variances: array([ 2356.66433103])
Class counts: ['class 0: 432']
Number of assays (aucs): 17
Optimal K: 1 [BIC=178.0]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 137.60132803])
	Variances: array([ 1479.84516686])
Class counts: ['class 0: 17']
Number of assays (aucs): 449
Optimal K: 1 [BIC=4766.6]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 140.2526638])
	Variances: array([ 2323.74290509])
Class counts: ['class 0: 449']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.818541
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Gefitinib
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      492
Number of assays (aucs): 492
Optimal K: 2 [BIC=4781.7]
GMM fit:
	Mixture Weights: array([ 0.66870742,  0.33129258])
	Means: array([ 253.3594795 ,  216.04175397])
	Variances: array([  403.86181731,  1573.84847907])
Class counts: ['class 0: 390', 'class 1: 102']
Number of assays (aucs): 17
Optimal K: 1 [BIC=174.4]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 222.58422479])
	Variances: array([ 1196.61206746])
Class counts: ['class 0: 17']
Number of assays (aucs): 509
Optimal K: 2 [BIC=4956.3]
GMM fit:
	Mixture Weights: array([ 0.34915529,  0.65084471])
	Means: array([ 215.87722042,  253.52710561])
	Variances: array([ 1519.95498714,   401.52908211])
Class counts: ['class 0: 117', 'class 1: 392']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.025187
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
INK-128
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      436
Number of assays (aucs): 436
Optimal K: 1 [BIC=4684.9]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 128.60194517])
	Variances: array([ 2642.44890348])
Class counts: ['class 0: 436']
Number of assays (aucs): 17
Optimal K: 3 [BIC=179.7]
GMM fit:
	Mixture Weights: array([ 0.64628954,  0.23578104,  0.11792943])
	Means: array([  81.5148204 ,  164.54098077,   38.07824608])
	Variances: array([  63.92442256,  495.70757433,   76.44918192])
Class counts: ['class 0: 11', 'class 1: 4', 'class 2: 2']
Number of assays (aucs): 453
Optimal K: 2 [BIC=4865.8]
GMM fit:
	Mixture Weights: array([ 0.45921674,  0.54078326])
	Means: array([ 160.86749645,   98.93841395])
	Variances: array([ 2243.83081391,  1232.842173  ])
Class counts: ['class 0: 190', 'class 1: 263']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.010058
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
MLN120B
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    10
AML assays:      429
Number of assays (aucs): 429
Optimal K: 2 [BIC=4298.1]
GMM fit:
	Mixture Weights: array([ 0.29395022,  0.70604978])
	Means: array([ 198.04981999,  246.5592976 ])
	Variances: array([ 2515.00648811,   512.24469362])
Class counts: ['class 0: 81', 'class 1: 348']
Number of assays (aucs): 10
Optimal K: 2 [BIC=72.6]
GMM fit:
	Mixture Weights: array([ 0.1,  0.9])
	Means: array([ 240.37675582,  278.17497378])
	Variances: array([  1.00000013e-03,   4.43807201e+01])
Class counts: ['class 0: 1', 'class 1: 9']
Number of assays (aucs): 439
Optimal K: 3 [BIC=4401.2]
GMM fit:
	Mixture Weights: array([ 0.08507338,  0.49587301,  0.4190536 ])
	Means: array([ 153.71423971,  253.34160741,  225.64310956])
	Variances: array([ 2977.22015572,   411.5327024 ,   898.41616319])
Class counts: ['class 0: 25', 'class 1: 269', 'class 2: 145']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.002866
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Flavopiridol
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      433
Number of assays (aucs): 433
Optimal K: 2 [BIC=4672.4]
GMM fit:
	Mixture Weights: array([ 0.66508816,  0.33491184])
	Means: array([ 103.66651482,  186.28874937])
	Variances: array([  988.81044947,  3253.09454755])
Class counts: ['class 0: 318', 'class 1: 115']
Number of assays (aucs): 17
Optimal K: 1 [BIC=175.4]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 124.76139218])
	Variances: array([ 1267.53140337])
Class counts: ['class 0: 17']
Number of assays (aucs): 450
Optimal K: 2 [BIC=4846.2]
GMM fit:
	Mixture Weights: array([ 0.33377889,  0.66622111])
	Means: array([ 185.03408958,  104.06270298])
	Variances: array([ 3240.20720134,   982.04746674])
Class counts: ['class 0: 119', 'class 1: 331']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.641317
---------------------------------------------------------
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
GSK-1838705A
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
---------------------------------------------------------
HNSCC assays:    17
AML assays:      429
Number of assays (aucs): 429
Optimal K: 2 [BIC=4379.6]
GMM fit:
	Mixture Weights: array([ 0.35558822,  0.64441178])
	Means: array([ 193.51224229,  226.23377434])
	Variances: array([ 2589.28618581,   762.09370287])
Class counts: ['class 0: 73', 'class 1: 356']
Number of assays (aucs): 17
Optimal K: 1 [BIC=177.0]
GMM fit:
	Mixture Weights: array([ 1.])
	Means: array([ 223.29207733])
	Variances: array([ 1396.9281905])
Class counts: ['class 0: 17']
Number of assays (aucs): 446
Optimal K: 2 [BIC=4553.7]
GMM fit:
	Mixture Weights: array([ 0.36067726,  0.63932274])
	Means: array([ 193.97388105,  226.75212404])
	Variances: array([ 2514.78181752,   774.64203683])
Class counts: ['class 0: 77', 'class 1: 369']
Running permutations...[100.00%]
Dasatinib permutation test p-value: 0.385719
In [295]:
AML_Dasatinib = aml_aucs[aml_aucs['inhibitor'] == 'Sorafenib']
print('Number of AML Sorafenib assays: %d' %len(AML_Dasatinib))
AML_Dasatinib.head()
Number of AML Sorafenib assays: 505
Out[295]:
inhibitor lab_id auc
427287 Sorafenib 11-00261 232.933886
427294 Sorafenib 11-00487 181.911965
427301 Sorafenib 12-00023 189.034206
427308 Sorafenib 12-00032 184.248305
427315 Sorafenib 12-00066 171.013986
In [ ]:
HNSCC_Dasatinib = HNSCC_auc[HNSCC_auc['inhibitor'] == 'Sorafenib'].drop_duplicates()
print('Number of HNSCC Sorafenib assays: %d' %len(HNSCC_Dasatinib))
HNSCC_Dasatinib.head()
In [ ]:
print('------------------------------------------------')
print('Inhibitor:  %s' %'AML_Dasatinib')
print('------------------------------------------------')
aml_= AML_Dasatinib.auc.values.reshape(-1,1)
c = test_multimodal_fits(aml_, ntests=10, kmax=6, inhib=inhib, plot=True)

print('------------------------------------------------')
print('Inhibitor:  %s' %'HNSCC_Dasatinib')
print('------------------------------------------------')
hnscc_=100*HNSCC_Dasatinib.auc.values.reshape(-1,1)
c = test_multimodal_fits(hnscc_, ntests=10, kmax=6, inhib=inhib, plot=True)
In [ ]:
nbins=50
bin_ = np.arange(0,300,300/nbins)

plt.figure()
plt.hist(aml_,color='blue',label='AML_AUC',normed=True,alpha=0.4,bins=bin_)
plt.hist(hnscc_,color='red',label='HNSCC_AUC',normed=True,alpha=0.4,bins=bin_)
plt.legend()
plt.show()
In [ ]:
 
In [ ]:
pval = permutation_test(aml_, hnscc_, n=1e6, verbose=True, return_prob=True)

print('Dasatinib permutation test p-value: %f' %pval)

Drug response associated mutations

In [ ]:
aml_var_all = pd.read_csv('./../data/vizome_beatAML_variants.csv', skiprows=[0,1,2])
#[aml_var_all.tumor_only == True]
aml_var = aml_var_all.dropna()[['sample_id', 'gene', 'chr','start','end','ref','alt','variant','sift']]

aml_var = aml_var.rename(columns={'sample_id':'lab_id'})

print(aml_var.head())

print(aml_var[['chr','start','end','ref','alt']].head())

print('Number of genes mutated: %d' %len(aml_var.gene.unique()))
print('Number of unique mutations: %d' %aml_var[['chr','start','end','ref','alt']].drop_duplicates().shape[0])

mut_cnt = aml_var.groupby('gene').count()['lab_id'].sort_values()
mult_patients = mut_cnt.index[mut_cnt > 20]
#aml_variants = aml_var[aml_var['lab_id'].isin(mult_patients)]

shared_mutations = [x for x in mult_patients]
print(shared_mutations)
print(aml_var.groupby('gene').count()['lab_id'].sort_values())

aml_var = aml_var[aml_var['gene'].isin(shared_mutations)]

aml_var.head()
In [ ]:
# convert variants to matrix format 

genes = pd.Series( aml_var.gene.unique() )

res = []
for lab_id in aml_var.lab_id.unique(): 
    D = aml_var[aml_var['lab_id'] == lab_id]
    x = genes.isin( D['gene'] ).tolist()
    res.append( [lab_id] + [str(int(xx)) for xx in x] )

res = pd.DataFrame( res, columns=['lab_id'] + genes.tolist() )

res.head()

MAT_VAR = res 

print('matrix variant shape: %s' %str(res.shape))
print(MAT_VAR.head())
In [282]:
print( aml_aucs.head() )

aml_aucs['lab_id']=aml_aucs['lab_id'].astype(str)
MAT_VAR['lab_id']=MAT_VAR['lab_id'].astype(str)

aml_auc_w_var = aml_aucs.merge(right=MAT_VAR, how='inner', on=['lab_id'])
aml_auc_w_var.head(5)
                inhibitor    lab_id         auc
0   17-AAG (Tanespimycin)  12-00211  225.918025
7   17-AAG (Tanespimycin)  12-00219  135.264409
14  17-AAG (Tanespimycin)  12-00258  164.561227
21  17-AAG (Tanespimycin)  12-00262  111.555971
28  17-AAG (Tanespimycin)  12-00268  226.805281
Out[282]:
inhibitor lab_id auc NRAS DNMT3A IDH1 U2AF1 JAK2
0 17-AAG (Tanespimycin) 13-00118 217.469453 0 1 0 0 0
1 A-674563 13-00118 200.671721 0 1 0 0 0
2 ABT-737 13-00118 210.841965 0 1 0 0 0
3 Afatinib (BIBW-2992) 13-00118 184.158486 0 1 0 0 0
4 AKT Inhibitor IV 13-00118 141.415353 0 1 0 0 0
In [283]:
Dasatinib = aml_auc_w_var[aml_auc_w_var['inhibitor'] == 'Dasatinib']

print('Dasatinib auc and variants shape: %s' %str(Dasatinib.shape))

aucs = Dasatinib.auc.values.reshape(-1,1)
X = Dasatinib[genes]
Y = test_multimodal_fits(aucs, inhib='Dasatinib', override_k=2)

X_embedded = TSNE(n_components=2).fit_transform(X)
df = pd.DataFrame( {'x1':X_embedded[:,0], 'x2':X_embedded[:,1], 'label':Y} )

plt.figure() 
sbn.scatterplot(x='x1', y='x2', hue='label', data=df)
plt.show()
Dasatinib auc and variants shape: (88, 8)
Number of assays (aucs): 88
Optimal K: 2 [BIC=982.6]
GMM fit:
	Mixture Weights: array([ 0.47590632,  0.52409368])
	Means: array([ 119.23863497,  219.31488033])
	Variances: array([  934.06293972,  1520.06524569])
Class counts: ['class 0: 42', 'class 1: 46']
In [284]:
reg = ElasticNet().fit(X, Y)

yhat = reg.predict(X)

pred = (yhat > 0.5)*1

acc = sum([a == b for a,b in zip(pred, Y)]*1) / len(Y)
print('Elastic Net accuracy: %.2f' %acc)

fpr, tpr, thresholds = metrics.roc_curve(Y, yhat, pos_label=1)
EN_auc = metrics.auc(fpr, tpr)
print('Elastic Net AUC: %.2f' %EN_auc)

plt.figure()
plt.plot(fpr,tpr,'r--')
plt.show()



logit = LogisticRegression().fit(X,Y)

yhat = logit.predict(X)
pred = (yhat > 0.5)*1
print(pred)

acc = sum([a == b for a,b in zip(pred, Y)]*1) / len(Y)
print('Logistic Regression accuracy: %.2f' %acc)

fpr, tpr, thresholds = metrics.roc_curve(Y, yhat, pos_label=1)
EN_auc = metrics.auc(fpr, tpr)
print('Logistic Regression AUC: %.2f' %EN_auc)

plt.figure()
plt.plot(fpr,tpr,'r--')
plt.show()

print('---------------------------------------------------')
print('STATSMODEL LOGIT')
print('---------------------------------------------------')

X2 = X.values.astype(int)
logit_sm = sm.Logit(Y, sm.add_constant(X2)).fit(method='lbfgs', maxiter=500)
yhat = logit_sm.predict(sm.add_constant(X2))

cutoff = 0.9
pred = (yhat > cutoff)*1

acc = sum([a == b for a,b in zip(pred, Y)]*1) / len(Y)
print('Logistic Regression accuracy: %.2f' %acc)

fpr, tpr, thresholds = metrics.roc_curve(Y, yhat, pos_label=1)
EN_auc = metrics.auc(fpr, tpr)
print('Logistic Regression AUC: %.2f' %EN_auc)

plt.figure()
plt.plot(fpr,tpr,'r--')
plt.show()

print( logit_sm.summary() )

#print(yhat)

#build_RF_variant_model(X,Y)
Elastic Net accuracy: 0.52
Elastic Net AUC: 0.50
[0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 1 1 1 0 0 1 1 1 1 1 1 1 1 0 0 0 1 0 1 0 0
 0 0 0 1 0 0 1 0 1 0 1 1 1 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 1 1 1 0 0 0 0 0
 0 0 0 1 0 0 1 1 0 0 1 1 0 0]
Logistic Regression accuracy: 0.70
Logistic Regression AUC: 0.71
---------------------------------------------------
STATSMODEL LOGIT
---------------------------------------------------
Logistic Regression accuracy: 0.60
Logistic Regression AUC: 0.76
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                      y   No. Observations:                   88
Model:                          Logit   Df Residuals:                       82
Method:                           MLE   Df Model:                            5
Date:                Tue, 24 Sep 2019   Pseudo R-squ.:                  0.2082
Time:                        22:56:55   Log-Likelihood:                -48.224
converged:                       True   LL-Null:                       -60.906
                                        LLR p-value:                 0.0001185
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.5231      1.106      0.473      0.636      -1.645       2.691
x1             0.5562      1.011      0.550      0.582      -1.425       2.537
x2            -1.2607      1.044     -1.208      0.227      -3.306       0.785
x3            16.2144    898.489      0.018      0.986   -1744.793    1777.221
x4            -1.0338      1.326     -0.780      0.435      -3.632       1.564
x5            -0.7719      1.210     -0.638      0.523      -3.143       1.599
==============================================================================

Possibly complete quasi-separation: A fraction 0.12 of observations can be
perfectly predicted. This might indicate that there is complete
quasi-separation. In this case some parameters will not be identified.
In [ ]: